
********************************************************************************
********************************************************************************
*************************** THIS IS DO FILE 1 **********************************
********* THIS FILE MERGES THE GROUP INFORMATION INTO THE IQVIA DATA  **********
********************************************************************************
********************************************************************************

*Version: 2023 March 29

cap mkdir `"$output/log"'
cap mkdir `"$output/log/1_data_merge"'

cap log close 
log using `"$output/log/1_data_merge/1_data_merge.smcl"', replace 
	
di "This run uses code version from 2023/03/29"


// Load across-state data and generate group size measures
use `"$raw_input/$x_state_file"', clear

//generate group variable -- Tax ID
gen skaGroup3a=string(ID)
replace skaGroup3a="MDPPASTIN"+tin  if ~missing(tin)
 
//only use groups we can identify and calculate size by year then average the size 
drop if skaGroup3a == ""
bys skaGroup3a year: gen phys_year = _N 
bys skaGroup3a year: keep if _n == 1 

bys skaGroup3a: gegen ms_group3 = mean(phys_year)
lab var ms_group3 "Average Size of Group 3 (Tax ID)"

sum ms_group3, d 


// Save average group size and merge back on to data
keep skaGroup3a year ms_group3
sort skaGroup3a year
save `"$raw_input/$x_state_size_merge"', replace

use `"$raw_input/$x_state_file"', clear

//generate group variable -- Tax ID
gen skaGroup3a=string(ID)
replace skaGroup3a="MDPPASTIN"+tin  if ~missing(tin)
 

//only use groups we can identify and calculate size by year then average the size 
bys skaGroup3a year: gen phys_year = _N 
replace phys_year=1 if skaGroup3a == ""


sort skaGroup3a year
merge m:1 skaGroup3a year using "$raw_input/$x_state_size_merge"
drop _merge

replace phys_year=1 if missing(phys_year)
replace ms_group3=1 if missing(ms_group3)
summ ms_group3, detail

save `"$raw_input/$x_state_file_size"', replace




//Take detailed provider file from SK&A and keep linking variables - NPI and idw_provider_id
use `"$raw_input/$provider_file"', clear
gen long NPI=real(npi)
sort NPI
drop if missing(NPI)
*13 providers are missing NPI
summ age, detail
gen ZIP=zipcode
sort ZIP
merge m:1 ZIP using `"$geography_files/$zip_to_county"' 
drop if _merge==2
drop _merge 

*116 are missing ZIP
keep NPI idw_provider_id COUNTY zipcode

sort NPI
save `"$raw_input/provider_mapping"', replace




//Link to cross state group data with group size information and add idw_proivder_id
use `"$raw_input/$x_state_file_size"', clear 

order NPI 
order npi

sort NPI

merge m:1 NPI using `"$raw_input/provider_mapping"'
tab _merge
replace COUNTY=string(FIPS) if missing(COUNTY)

keep if _merge==3
drop _merge

gen medicare_acceptinsurance=medicare=="Y" if ~missing(medicare)

keep  idw_provider_id year age ms_group3 phys_year skaGroup3a zipcode COUNTY ///
	sex medicare_acceptinsurance acceptinsurance
sort idw_provider_id year


save `"$raw_input/$x_state_file_size_provider"', replace				
	

//Merge back onto regression data.
use `"$output/data/$line_file_temp"', clear 
sort idw_provider_id year
merge m:1 idw_provider_id year using `"$raw_input/$x_state_file_size_provider"'
drop if _merge == 2 
drop _merge 

//create group size indicator
gen size_indicator = .
replace size_indicator = 1 if ms_group3 <= $group_threshold & ms_group3 != .
replace size_indicator = 2 if ms_group3 > $group_threshold & ms_group3 != .
lab var size_indicator "Group Size Indicator"
drop if size_indicator == .

save `"$output/data/$line_file"', replace 



log close
